#############################################################################################################
## Olivia Chi & Aaron Dow
## GOV2001
## Replication Code
#############################################################################################################

setwd("C:/Users/olc317/Documents/GOV2001")

rm(list = ls())
ls()

library(foreign)
library(MatchIt)
library(cem)
library(Zelig)
library(sandwich)
library(lmtest)
library(stargazer)
library(mvtnorm)
library(Rlab)

########################
# Load data
########################

data <- read.dta("data_for_analysis.dta")

## Clustered Standard Errors
cl   <- function(dat,fm, cluster){
  attach(dat, warn.conflicts = F)
  library(sandwich)
  M <- length(unique(cluster))   
  N <- length(cluster)            
  K <- fm$rank                 
  dfc <- (M/(M-1))*((N-1)/(N-K))  
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }

# sample for original analysis
sample <- data[data$dist_from_cut >= -0.6 & data$dist_from_cut <=0.6,]
# Create a variable for the next GPA, imputing from nextGPA variable
sample$nextGPA_temp <- sample$nextGPA + sample$gpacutoff
# Create binary indicator for improving GPA in next term
sample$improve_GPA[sample$nextGPA_temp > sample$GPA_year1] <- 1
sample$improve_GPA[sample$nextGPA_temp <= sample$GPA_year1] <-0

# Imbalance between treatment and control in original analysis
plot(density(sample$hsgrade_pct[sample$gpalscutoff==1]),col="red",
     xlim = c(0,100),
     main = "Density of HS Grade Percentile",
     lty=2,
     xlab = "HS Grade Percentile")
lines(density(sample$hsgrade_pct[sample$gpalscutoff==0]),col="blue")
legend(55,.022, bty="n",
       c("Good Standing","Under Probation"),
       lty=c(1,2),
       lwd=c(1,1),col=c("blue","red")) 


###################################
# Table 1 Summary Statistics
###################################

# Function to display mean and SD
meanstdv <- function(vector) {
  m <- mean(vector, na.rm=TRUE)
  s <- sd(vector, na.rm=TRUE)
  return(c(m,s))
}

# Output displays results in Table 1

# Characteristics
meanstdv(sample$hsgrade_pct)
meanstdv(sample$totcredits_year1)
meanstdv(sample$age_at_entry)
meanstdv(sample$male)
meanstdv(sample$english)
meanstdv(sample$bpl_north_america)
meanstdv(sample$loc_campus1)
meanstdv(sample$loc_campus2)
meanstdv(sample$loc_campus3)

# Outcomes
meanstdv(sample$dist_from_cut)
meanstdv(sample$probation_year1)
meanstdv(sample$probation_ever)
meanstdv(sample$left_school)
meanstdv(sample$nextGPA)
meanstdv(sample$suspended_ever)
meanstdv(sample$gradin4)
meanstdv(sample$gradin5)
meanstdv(sample$gradin6)

# Subset of data for summary statistics
summary_stat_data <- cbind(sample$hsgrade_pct,sample$totcredits_year1, sample$age_at_entry, sample$male, 
                           sample$english, sample$bpl_north_america, sample$loc_campus1, sample$loc_campus2,
                           sample$loc_campus3, sample$dist_from_cut, sample$probation_year1, sample$probation_ever, 
                           sample$left_school, sample$nextGPA, sample$suspended_ever, sample$gradin4, sample$gradin5,
                           sample$gradin6)

summary_stat_data <- as.data.frame(summary_stat_data)

variable.names<-c("High school grade percentile", "Credits attempted in first year", "Age at entry" , "Male", "English is first language",
                  "Born in North America", "At Campus 1", "At Campus 2", "At Campus 3", "Distance from cutoff in 1st year", 
                  "On probation after 1st year", "Ever on academic probation", "Left university after 1st evaluation", 
                  "Distance from cutoff at next evaluation", "Ever suspended", "Graduated by year 4", 
                  "Graduated by year 5", "Graduated by year 6")

colnames(summary_stat_data) <- variable.names

stargazer(summary_stat_data, digits=2, title="SUMMARY STATISTICS", nobs=F, min.max=F)


test1 <- lm(left_school ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
            data = sample)
cl(sample, test1, sample$clustervar)


#####################################################################
# Table 2 - Estimated Discontinuities in Observable Characteristics
#####################################################################

# Set up regression models
col1 <- lm(hsgrade_pct ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
           data = sample)
col2 <- lm(totcredits_year1 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
           data = sample)
col3 <- lm(age_at_entry ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
           data = sample)
col4 <- lm(male ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
           data = sample)
col5 <- lm(bpl_north_america ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
           data = sample)
col6 <- lm(english ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
           data = sample)
col7 <- lm(loc_campus1 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
           data = sample)
col8 <- lm(loc_campus2 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
           data = sample)
col9 <- lm(loc_campus3 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
           data = sample)

# Output displays results in Table 2

cl1 <- cl(sample, col1, sample$clustervar)
cl2 <- cl(sample, col2, sample$clustervar)
cl3 <- cl(sample, col3, sample$clustervar)
cl4 <- cl(sample, col4, sample$clustervar)
cl5 <- cl(sample, col5, sample$clustervar)
cl6 <- cl(sample, col6, sample$clustervar)
cl7 <- cl(sample, col7, sample$clustervar)
cl8 <- cl(sample, col8, sample$clustervar)
cl9 <- cl(sample, col9, sample$clustervar)

se <- list(cl1[,2],cl2[,2],cl3[,2],cl4[,2],cl5[,2],cl6[,2],cl7[,2],cl8[,2],cl9[,2])
p <- list(cl1[,4],cl2[,4],cl3[,4],cl4[,4],cl5[,4],cl6[,4],cl7[,4],cl8[,4],cl9[,4])
stargazer(col1,col2,col3,col4,col5,col6,col7,col8,col9, se=se, p=p)


###############################################################################
# Table 3 - Estimated Discontinuity in Probation Status
# Panel A: Dependent variable: On academic probation after first evaluation
###############################################################################

# Create subgroups
lowHS <- sample[sample$lowHS == 1, ]
highHS <- sample[sample$highHS == 1, ]
male <- sample[sample$male == 1, ]
female <- sample[sample$female == 1, ]
english <- sample[sample$english == 1, ]
noenglish <- sample[sample$noenglish == 1, ]

# Set up regression models
table3col1 <- lm(probation_year1 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = sample)

table3col2 <- lm(probation_year1 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = lowHS)

table3col3 <- lm(probation_year1 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = highHS)

table3col4 <- lm(probation_year1 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = male)

table3col5 <- lm(probation_year1 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = female)

table3col6 <- lm(probation_year1 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = english)

table3col7 <- lm(probation_year1 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = noenglish)


# Output displays results in Table 3 Panel A
cl31 <- cl(sample, table3col1, sample$clustervar)
cl32 <- cl(lowHS, table3col2, lowHS$clustervar)
cl33 <- cl(lowHS, table3col3, highHS$clustervar)
cl34 <- cl(male, table3col4, male$clustervar)
cl35 <- cl(female, table3col5, female$clustervar)
cl36 <- cl(english, table3col6, english$clustervar)
cl37 <- cl(noenglish, table3col7, noenglish$clustervar)


se <- list(cl31[,2],cl32[,2],cl33[,2],cl34[,2],cl35[,2],cl36[,2],cl37[,2])
p <- list(cl31[,4],cl32[,4],cl33[,4],cl34[,4],cl35[,4],cl36[,4],cl37[,4])
stargazer(table3col1,table3col2,table3col3,table3col4,table3col5,table3col6,table3col7, se=se, p=p)

###############################################################################
# Table 3 - Estimated Discontinuity in Probation Status
# Panel B: Dependent variable: Ever on academic probation
###############################################################################

# Set up regression models
table3col1b <- lm(probation_ever ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = sample)

table3col2b <- lm(probation_ever ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = lowHS)

table3col3b <- lm(probation_ever ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = highHS)

table3col4b <- lm(probation_ever ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = male)

table3col5b <- lm(probation_ever ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = female)

table3col6b <- lm(probation_ever ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = english)

table3col7b <- lm(probation_ever ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = noenglish)

# Output displays results in Table 3 Panel B
cl31b <- cl(sample, table3col1b, sample$clustervar)
cl32b <- cl(lowHS, table3col2b, lowHS$clustervar)
cl33b <- cl(lowHS, table3col3b, highHS$clustervar)
cl34b <- cl(male, table3col4b, male$clustervar)
cl35b <- cl(female, table3col5b, female$clustervar)
cl36b <- cl(english, table3col6b, english$clustervar)
cl37b <- cl(noenglish, table3col7b, noenglish$clustervar)

se <- list(cl31b[,2],cl32b[,2],cl33b[,2],cl34b[,2],cl35b[,2],cl36b[,2],cl37b[,2])
p <- list(cl31b[,4],cl32b[,4],cl33b[,4],cl34b[,4],cl35b[,4],cl36b[,4],cl37b[,4])
stargazer(table3col1b,table3col2b,table3col3b,table3col4b,table3col5b,table3col6b,table3col7b, se=se, p=p)


###############################################################################
# Table 4 - Estimated Effect on the Decision to Leave After the First Evaluation
###############################################################################

# Set up regression models
table4col1 <- lm(left_school ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = sample)

table4col2 <- lm(left_school ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = lowHS)

table4col3 <- lm(left_school ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = highHS)


table4col4 <-lm(left_school ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                data = male)


table4col5 <-lm(left_school ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                data = female)


table4col6 <-lm(left_school ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                data = english)


table4col7 <-lm(left_school ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                data = noenglish)

# Output displays results in Table 4
cl41 <- cl(sample, table4col1, sample$clustervar)
cl42 <- cl(lowHS, table4col2, lowHS$clustervar )
cl43 <- cl(highHS, table4col3, highHS$clustervar)
cl44 <- cl(male, table4col4, male$clustervar)
cl45 <- cl(female, table4col5, female$clustervar)
cl46 <- cl(english, table4col6, english$clustervar)
cl47 <- cl(noenglish, table4col7, noenglish$clustervar)


se <- list(cl41[,2],cl42[,2],cl43[,2],cl44[,2],cl45[,2],cl46[,2],cl47[,2])
p <- list(cl41[,4],cl42[,4],cl43[,4],cl44[,4],cl45[,4],cl46[,4],cl47[,4])
stargazer(table4col1,table4col2,table4col3,table4col4,table4col5,table4col6,table4col7, se=se, p=p)



###############################################################################
# Table 5 - Estimated Discontinuities in Subsequent GPA
# Panel A: Dependent variable: Next term GPA
###############################################################################

# Set up the subsets for each column & set up regression models
# All (1)
sample2 <- subset(sample, select = c(nextGPA, gpalscutoff,
                                                 gpaXgpalscutoff, gpaXgpagrcutoff,
                                                 clustervar))
sample2 <- na.omit(sample2)

table5col1 <- lm(nextGPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = sample2)


# HS grades < median
lowHS2 <- subset(lowHS, select = c(nextGPA, gpalscutoff,
                                   gpaXgpalscutoff, gpaXgpagrcutoff,
                                   clustervar))
lowHS2 <- na.omit(lowHS2)
table5col2 <- lm(nextGPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = lowHS2)


# HS grades > median
highHS2 <- subset(highHS, select = c(nextGPA, gpalscutoff,
                                     gpaXgpalscutoff, gpaXgpagrcutoff,
                                     clustervar))
highHS2 <- na.omit(highHS2)
table5col3 <- lm(nextGPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = highHS2)

# Male
male2 <- subset(male, select = c(nextGPA, gpalscutoff,
                                 gpaXgpalscutoff, gpaXgpagrcutoff,
                                 clustervar))
male2 <- na.omit(male2)
table5col4 <- lm(nextGPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = male2)


# Female
female2 <- subset(female, select = c(nextGPA, gpalscutoff,
                                     gpaXgpalscutoff, gpaXgpagrcutoff,
                                     clustervar))
female2 <- na.omit(female2)
table5col5 <- lm(nextGPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = female2)


# Native English
english2 <- subset(english, select = c(nextGPA, gpalscutoff,
                                       gpaXgpalscutoff, gpaXgpagrcutoff,
                                       clustervar))
english2 <- na.omit(english2)
table5col6 <- lm(nextGPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = english2)


# Nonnative English
noenglish2 <- subset(noenglish, select = c(nextGPA, gpalscutoff,
                                           gpaXgpalscutoff, gpaXgpagrcutoff,
                                           clustervar))
noenglish2 <- na.omit(noenglish2)
table5col7 <- lm(nextGPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = noenglish2)


# Output displays results in Table 5 Panel A
cl51 <- cl(sample2, table5col1, sample2$clustervar)
cl52 <- cl(lowHS2, table5col2, lowHS2$clustervar)
cl53 <- cl(highHS2, table5col3, highHS2$clustervar)
cl54 <- cl(male2, table5col4, male2$clustervar)
cl55 <- cl(female2, table5col5, female2$clustervar)
cl56 <- cl(english2, table5col6, english2$clustervar)
cl57 <- cl(noenglish2, table5col7, noenglish2$clustervar)


se <- list(cl51[,2],cl52[,2],cl53[,2],cl54[,2],cl55[,2],cl56[,2],cl57[,2])
p <- list(cl51[,4],cl52[,4],cl53[,4],cl54[,4],cl55[,4],cl56[,4],cl57[,4])
stargazer(table5col1,table5col2,table5col3,table5col4,table5col5,table5col6,table5col7, se=se, p=p)


###############################################################################
# Table 5 - Estimated Discontinuities in Subsequent GPA
# Panel B: Dependent variable: Probability of improving GPA in next term
###############################################################################

# The "nextGPA" variable seems to have been transformed from an actual GPA
# to the next GPA's distance from the cutoff.  Reverse the operation to get 
# the next GPA, and then create binary indicator on whether or not it was 
# greater than GPA_year1.

# Due to loss of accuracy from using "nextGPA" variable, which seems to have been
# rounded, results do not perfectly match results in article.

# Set up the subsets for each column & set up regression models
sample2b <- subset(sample, select = c(improve_GPA, gpalscutoff,
                                                  gpaXgpalscutoff, gpaXgpagrcutoff,
                                                  clustervar))
sample2b <- na.omit(sample2b)

table5col1b <- lm(improve_GPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = sample2b)

# HS grades < median
lowHS2b <- subset(lowHS, select = c(improve_GPA, gpalscutoff,
                                    gpaXgpalscutoff, gpaXgpagrcutoff,
                                    clustervar))
lowHS2b <- na.omit(lowHS2b)
table5col2b <- lm(improve_GPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = lowHS2b)


# HS grades > median
highHS2b <- subset(highHS, select = c(improve_GPA, gpalscutoff,
                                      gpaXgpalscutoff, gpaXgpagrcutoff,
                                      clustervar))
highHS2b <- na.omit(highHS2b)
table5col3b <- lm(improve_GPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = highHS2b)

# Male
male2b <- subset(male, select = c(improve_GPA, gpalscutoff,
                                  gpaXgpalscutoff, gpaXgpagrcutoff,
                                  clustervar))
male2b <- na.omit(male2b)
table5col4b <- lm(improve_GPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = male2b)


# Female
female2b <- subset(female, select = c(improve_GPA, gpalscutoff,
                                      gpaXgpalscutoff, gpaXgpagrcutoff,
                                      clustervar))
female2b <- na.omit(female2b)
table5col5b <- lm(improve_GPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = female2b)


# Native English
english2b <- subset(english, select = c(improve_GPA, gpalscutoff,
                                        gpaXgpalscutoff, gpaXgpagrcutoff,
                                        clustervar))
english2b <- na.omit(english2b)
table5col6b <- lm(improve_GPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = english2b)


# Nonnative English
noenglish2b <- subset(noenglish, select = c(improve_GPA, gpalscutoff,
                                            gpaXgpalscutoff, gpaXgpagrcutoff,
                                            clustervar))
noenglish2b <- na.omit(noenglish2b)
table5col7b <- lm(improve_GPA ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = noenglish2b)


# Output displays results in Table 5 Panel B
cl51b <- cl(sample2b, table5col1b, sample2b$clustervar)
cl52b <- cl(lowHS2b, table5col2b, lowHS2b$clustervar)
cl53b <- cl(highHS2b, table5col3b, highHS2b$clustervar)
cl54b <- cl(male2b, table5col4b, male2b$clustervar)
cl55b <- cl(female2b, table5col5b, female2b$clustervar)
cl56b <- cl(english2b, table5col6b, english2b$clustervar)
cl57b <- cl(noenglish2b, table5col7b, noenglish2b$clustervar)


se <- list(cl51b[,2],cl52b[,2],cl53b[,2],cl54b[,2],cl55b[,2],cl56b[,2],cl57b[,2])
p <- list(cl51b[,4],cl52b[,4],cl53b[,4],cl54b[,4],cl55b[,4],cl56b[,4],cl57b[,4])
stargazer(table5col1b,table5col2b,table5col3b,table5col4b,table5col5b,table5col6b,table5col7b, se=se, p=p)


###############################################################################
# Table 6 - Estimated Effects on Graduation
# Panel A: Dependent variable: graduated after four years
###############################################################################

# Set up the subsets for each column & set up regression models

# All
sample6a <- subset(sample, select = c(gradin4, gpalscutoff,
                                                  gpaXgpalscutoff, gpaXgpagrcutoff,
                                                  clustervar))
sample6a <- na.omit(sample6a)
table6col1 <- lm(gradin4 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = sample6a)


# HS grades < median
lowHS6a <- subset(lowHS, select = c(gradin4, gpalscutoff,
                                    gpaXgpalscutoff, gpaXgpagrcutoff,
                                    clustervar))
lowHS6a <- na.omit(lowHS6a)
table6col2 <- lm(gradin4 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = lowHS6a)


# HS grades > median
highHS6a <- subset(highHS, select = c(gradin4, gpalscutoff,
                                      gpaXgpalscutoff, gpaXgpagrcutoff,
                                      clustervar))
highHS6a <- na.omit(highHS6a)
table6col3 <- lm(gradin4 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = highHS6a)


# Male
male6a <- subset(male, select = c(gradin4, gpalscutoff,
                                  gpaXgpalscutoff, gpaXgpagrcutoff,
                                  clustervar))
male6a <- na.omit(male6a)
table6col4 <- lm(gradin4 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = male6a)


# Female
female6a <- subset(female, select = c(gradin4, gpalscutoff,
                                      gpaXgpalscutoff, gpaXgpagrcutoff,
                                      clustervar))
female6a <- na.omit(female6a)
table6col5 <- lm(gradin4 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = female6a)


# English
english6a <- subset(english, select = c(gradin4, gpalscutoff,
                                        gpaXgpalscutoff, gpaXgpagrcutoff,
                                        clustervar))
english6a <- na.omit(english6a)
table6col6 <- lm(gradin4 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = english6a)


# Nonnative English
noenglish6a <- subset(noenglish, select = c(gradin4, gpalscutoff,
                                            gpaXgpalscutoff, gpaXgpagrcutoff,
                                            clustervar))
noenglish6a <- na.omit(noenglish6a)
table6col7 <- lm(gradin4 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                 data = noenglish6a)

# Output displays results in Table 6 Panel A
cl61a <- cl(sample6a, table6col1, sample6a$clustervar)
cl62a <- cl(lowHS6a, table6col2, lowHS6a$clustervar)
cl63a <- cl(highHS6a, table6col3, highHS6a$clustervar)
cl64a <- cl(male6a, table6col4, male6a$clustervar)
cl65a <- cl(female6a, table6col5, female6a$clustervar)
cl66a <- cl(english6a, table6col6, english6a$clustervar)
cl67a <- cl(noenglish6a, table6col7, noenglish6a$clustervar)


se <- list(cl61a[,2],cl62a[,2],cl63a[,2],cl64a[,2],cl65a[,2],cl66a[,2],cl67a[,2])
p <- list(cl61a[,4],cl62a[,4],cl63a[,4],cl64a[,4],cl65a[,4],cl66a[,4],cl67a[,4])
stargazer(table6col1,table6col2,table6col3,table6col4,table6col5,table6col6,table6col7, se=se, p=p)


###############################################################################
# Table 6 - Estimated Effects on Graduation
# Panel B: Dependent variable: graduated after five years
###############################################################################

# Set up the subsets for each column & set up regression models

# All
sample6b <- subset(sample, select = c(gradin5, gpalscutoff,
                                                  gpaXgpalscutoff, gpaXgpagrcutoff,
                                                  clustervar))
sample6b <- na.omit(sample6b)
table6col1b <- lm(gradin5 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = sample6b)


# HS grades < median
lowHS6b <- subset(lowHS, select = c(gradin5, gpalscutoff,
                                    gpaXgpalscutoff, gpaXgpagrcutoff,
                                    clustervar))
lowHS6b <- na.omit(lowHS6b)
table6col2b <- lm(gradin5 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = lowHS6b)


# HS grades > median
highHS6b <- subset(highHS, select = c(gradin5, gpalscutoff,
                                      gpaXgpalscutoff, gpaXgpagrcutoff,
                                      clustervar))
highHS6b <- na.omit(highHS6b)
table6col3b <- lm(gradin5 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = highHS6b)


# Male
male6b <- subset(male, select = c(gradin5, gpalscutoff,
                                  gpaXgpalscutoff, gpaXgpagrcutoff,
                                  clustervar))
male6b <- na.omit(male6b)
table6col4b <- lm(gradin5 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = male6b)


# Female
female6b <- subset(female, select = c(gradin5, gpalscutoff,
                                      gpaXgpalscutoff, gpaXgpagrcutoff,
                                      clustervar))
female6b <- na.omit(female6b)
table6col5b <- lm(gradin5 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = female6b)


# English
english6b <- subset(english, select = c(gradin5, gpalscutoff,
                                        gpaXgpalscutoff, gpaXgpagrcutoff,
                                        clustervar))
english6b <- na.omit(english6b)
table6col6b <- lm(gradin5 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = english6b)


# Nonnative English
noenglish6b <- subset(noenglish, select = c(gradin5, gpalscutoff,
                                            gpaXgpalscutoff, gpaXgpagrcutoff,
                                            clustervar))
noenglish6b <- na.omit(noenglish6b)
table6col7b <- lm(gradin5 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = noenglish6b)

# Output displays results in Table 6 Panel B
cl61b <- cl(sample6b, table6col1b, sample6b$clustervar)
cl62b <- cl(lowHS6b, table6col2b, lowHS6b$clustervar)
cl63b <- cl(highHS6b, table6col3b, highHS6b$clustervar)
cl64b <- cl(male6b, table6col4b, male6b$clustervar)
cl65b <- cl(female6b, table6col5b, female6b$clustervar)
cl66b <- cl(english6b, table6col6b, english6b$clustervar)
cl67b <- cl(noenglish6b, table6col7b, noenglish6b$clustervar)


se <- list(cl61b[,2],cl62b[,2],cl63b[,2],cl64b[,2],cl65b[,2],cl66b[,2],cl67b[,2])
p <- list(cl61b[,4],cl62b[,4],cl63b[,4],cl64b[,4],cl65b[,4],cl66b[,4],cl67b[,4])
stargazer(table6col1b,table6col2b,table6col3b,table6col4b,table6col5b,table6col6b,table6col7b, se=se, p=p)


###############################################################################
# Table 6 - Estimated Effects on Graduation
# Panel C: Dependent variable: graduated after six years
###############################################################################

# Set up the subsets for each column & set up regression models
# All
sample6c <- subset(sample, select = c(gradin6, gpalscutoff,
                                                  gpaXgpalscutoff, gpaXgpagrcutoff,
                                                  clustervar))
sample6c <- na.omit(sample6c)
table6col1c <- lm(gradin6 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = sample6c)

# HS grades < median
lowHS6c <- subset(lowHS, select = c(gradin6, gpalscutoff,
                                    gpaXgpalscutoff, gpaXgpagrcutoff,
                                    clustervar))
lowHS6c <- na.omit(lowHS6c)
table6col2c <- lm(gradin6 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = lowHS6c)


# HS grades > median
highHS6c <- subset(highHS, select = c(gradin6, gpalscutoff,
                                      gpaXgpalscutoff, gpaXgpagrcutoff,
                                      clustervar))
highHS6c <- na.omit(highHS6c)
table6col3c <- lm(gradin6 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = highHS6c)


# Male
male6c <- subset(male, select = c(gradin6, gpalscutoff,
                                  gpaXgpalscutoff, gpaXgpagrcutoff,
                                  clustervar))
male6c <- na.omit(male6c)
table6col4c <- lm(gradin6 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = male6c)



# Female
female6c <- subset(female, select = c(gradin6, gpalscutoff,
                                      gpaXgpalscutoff, gpaXgpagrcutoff,
                                      clustervar))
female6c <- na.omit(female6c)
table6col5c <- lm(gradin6 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = female6c)



# English
english6c <- subset(english, select = c(gradin6, gpalscutoff,
                                        gpaXgpalscutoff, gpaXgpagrcutoff,
                                        clustervar))
english6c <- na.omit(english6c)
table6col6c <- lm(gradin6 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = english6c)

# Nonnative English
noenglish6c <- subset(noenglish, select = c(gradin6, gpalscutoff,
                                            gpaXgpalscutoff, gpaXgpagrcutoff,
                                            clustervar))
noenglish6c <- na.omit(noenglish6c)
table6col7c <- lm(gradin6 ~ gpalscutoff + gpaXgpalscutoff + gpaXgpagrcutoff,
                  data = noenglish6c)

# Output displays results in Table 6 Panel C
cl61c <- cl(sample6c, table6col1c, sample6c$clustervar)
cl62c <- cl(lowHS6c, table6col2c, lowHS6c$clustervar)
cl63c <- cl(highHS6c, table6col3c, highHS6c$clustervar)
cl64c <- cl(male6c, table6col4c, male6c$clustervar)
cl65c <- cl(female6c, table6col5c, female6c$clustervar)
cl66c <- cl(english6c, table6col6c, english6c$clustervar)
cl67c <- cl(noenglish6c, table6col7c, noenglish6c$clustervar)


se <- list(cl61c[,2],cl62c[,2],cl63c[,2],cl64c[,2],cl65c[,2],cl66c[,2],cl67c[,2])
p <- list(cl61c[,4],cl62c[,4],cl63c[,4],cl64c[,4],cl65c[,4],cl66c[,4],cl67c[,4])
stargazer(table6col1c,table6col2c,table6col3c,table6col4c,table6col5c,table6col6c,table6col7c, se=se, p=p)

#####################################################
# Simulations
# Left Schools
#####################################################


#############
# ALL
#############

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights")
data.for.sim <- sample[myvars]

# Delete rows containing missing data
data.for.sim <- na.omit(data.for.sim)

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$left_school
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
            w=W,
             method = "BFGS", control = list(fnscale = -1),
            hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1 <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0 <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
par(xpd=FALSE)
plot(density(pred.pi0), col="blue",
     main = "All",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1), lty=2, col = "red")
legend(.2,50, 
       c("Good Standing","Under Probation"),
       lty=c(1,2),
       lwd=c(2.5,2.5),col=c("blue","red")) 



#####################
# HS Grades < median
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS")
data.for.sim <- sample[myvars]

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$lowHS==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$left_school
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.low <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.low <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.low), col="blue",
     main = "HS grades < median",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.low), lty=2, col = "red")
legend(.2,50, 
       c("Good Standing","Under Probation"),
       lty=c(1,2),
       lwd=c(2.5,2.5),col=c("blue","red")) 


#####################
# HS Grades > median
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS")
data.for.sim <- sample[myvars]

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$highHS==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$left_school
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.high <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.high <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.high), col="blue",
     main = "HS grades > median",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.high), lty=2, col = "red")
legend(.2,50, 
       c("Good Standing","Under Probation"),
       lty=c(1,2),
       lwd=c(2.5,2.5),col=c("blue","red")) 



#####################
# Male
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS")
data.for.sim <- sample[myvars]

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$male==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$left_school
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.male <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.male <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.male), col="blue",
     main = "Males",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.male), lty=2, col = "red")
legend(.2,50, 
       c("Good Standing","Under Probation"),
       lty=c(1,2),
       lwd=c(2.5,2.5),col=c("blue","red")) 




#####################
# Female
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS")
data.for.sim <- sample[myvars]

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$male==0,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$left_school
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.female <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.female <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.female), col="blue",
     main = "Females",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.female), lty=2, col = "red")
legend(.2,50, 
       c("Good Standing","Under Probation"),
       lty=c(1,2),
       lwd=c(2.5,2.5),col=c("blue","red")) 



#####################
# Native English
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS")
data.for.sim <- sample[myvars]

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$english==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$left_school
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.english <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.english <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.english), col="blue",
     main = "Native English",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.english), lty=2, col = "red")
legend(.2,50, 
       c("Good Standing","Under Probation"),
       lty=c(1,2),
       lwd=c(2.5,2.5),col=c("blue","red")) 




#####################
# Nonnative English
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS")
data.for.sim <- sample[myvars]

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$english==0,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$left_school
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.nonenglish <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.nonenglish <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.nonenglish), col="blue",
     main = "Nonnative English",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.nonenglish), lty=2, col = "red")
legend(.2,50, 
       c("Good Standing","Under Probation"),
       lty=c(1,2),
       lwd=c(2.5,2.5),col=c("blue","red")) 


par( mfrow = c( 3, 2 ) )
plot(density(pred.pi0), col="blue",
     main = "All",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1), lty=2, col = "red")
#plot.new()
plot(density(pred.pi0.low), col="blue",
     main = "HS grades < median",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.low), lty=2, col = "red")
plot(density(pred.pi0.high), col="blue",
     main = "HS grades > median",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.high), lty=2, col = "red")
plot(density(pred.pi0.male), col="blue",
     main = "Males",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.male), lty=2, col = "red")
plot(density(pred.pi0.female), col="blue",
     main = "Females",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.female), lty=2, col = "red")
plot(density(pred.pi0.english), col="blue",
     main = "Native English",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.english), lty=2, col = "red")
plot(density(pred.pi0.nonenglish), col="blue",
     main = "Nonnative English",
     xlab = "Proportion leaving school",
     ylim = c(0,60),
     xlim = c(0,.12),
     sub= "") 
lines(density(pred.pi1.nonenglish), lty=2, col = "red")
dev.off()


#####################################################
# Simulations
# Subsequent GPA
#####################################################


#############
# ALL
#############

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights","nextGPA")
data.for.sim <- sample[myvars]

# Delete rows containing missing data
data.for.sim <- na.omit(data.for.sim)

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$nextGPA
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1 <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0 <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0), col="blue",
     main = "All",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1), lty=2, col = "red")



#####################
# HS Grades < median
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS","nextGPA")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$lowHS==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$nextGPA
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.low <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.low <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.low), col="blue",
     main = "HS grades < median",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.low), lty=2, col = "red")


#####################
# HS Grades > median
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS", "nextGPA")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$highHS==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$nextGPA
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.high <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.high <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.high), col="blue",
     main = "HS grades > median",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.high), lty=2, col = "red")




#####################
# Male
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS", "nextGPA")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$male==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$nextGPA
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.male <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.male <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.male), col="blue",
     main = "Males",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.male), lty=2, col = "red")



#####################
# Female
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS", "nextGPA")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$male==0,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$nextGPA
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.female <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.female <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.female), col="blue",
     main = "Females",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.female), lty=2, col = "red")



#####################
# Native English
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS", "nextGPA")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$english==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$nextGPA
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.english <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.english <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.english), col="blue",
     main = "Native English",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.english), lty=2, col = "red")



#####################
# Nonnative English
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS", "nextGPA")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)


# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$english==0,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$nextGPA
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.nonenglish <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.nonenglish <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.nonenglish), col="blue",
     main = "Nonnative English",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.nonenglish), lty=2, col = "red")



par( mfrow = c( 3, 2 ) )
plot(density(pred.pi0), col="blue",
     main = "All",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1), lty=2, col = "red")
#plot.new()
plot(density(pred.pi0.low), col="blue",
     main = "HS grades < median",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.low), lty=2, col = "red")
plot(density(pred.pi0.high), col="blue",
     main = "HS grades > median",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.high), lty=2, col = "red")
plot(density(pred.pi0.male), col="blue",
     main = "Males",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.male), lty=2, col = "red")
plot(density(pred.pi0.female), col="blue",
     main = "Females",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.female), lty=2, col = "red")
plot(density(pred.pi0.english), col="blue",
     main = "Native English",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.english), lty=2, col = "red")
plot(density(pred.pi0.nonenglish), col="blue",
     main = "Nonnative English",
     xlab = "Subsequent GPA minus probation cutoff",
     ylim = c(0,20),
     xlim = c(0,1),
     sub= "") 
lines(density(pred.pi1.nonenglish), lty=2, col = "red")
dev.off()


#####################################################
# Simulations
# Graduate in 6 years
#####################################################


#############
# ALL
#############

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights","gradin6")
data.for.sim <- sample[myvars]

# Delete rows containing missing data
data.for.sim <- na.omit(data.for.sim)

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$gradin6
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1 <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0 <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0), col="blue",
     main = "All",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(.5,.8),
     sub= "") 
lines(density(pred.pi1), lty=2, col = "red")


#####################
# HS Grades < median
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS","gradin6")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$lowHS==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$gradin6
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.low <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.low <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.low), col="blue",
     main = "HS grades < median",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(.5,.8),
     sub= "") 
lines(density(pred.pi1.low), lty=2, col = "red")


#####################
# HS Grades > median
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS", "gradin6")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$highHS==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$gradin6
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.high <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.high <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.high), col="blue",
     main = "HS grades > median",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(.5,.8),
     sub= "") 
lines(density(pred.pi1.high), lty=2, col = "red")




#####################
# Male
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS", "gradin6")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$male==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$gradin6
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.male <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.male <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.male), col="blue",
     main = "Males",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(0.5,.8),
     sub= "") 
lines(density(pred.pi1.male), lty=2, col = "red")



#####################
# Female
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS", "gradin6")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$male==0,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$gradin6
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.female <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.female <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.female), col="blue",
     main = "Females",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(.5,.8),
     sub= "") 
lines(density(pred.pi1.female), lty=2, col = "red")



#####################
# Native English
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS", "gradin6")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)

# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$english==1,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$gradin6
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.english <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.english <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.english), col="blue",
     main = "Native English",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(0.5,.8),
     sub= "") 
lines(density(pred.pi1.english), lty=2, col = "red")



#####################
# Nonnative English
#####################

# Subset the data
myvars <- c("left_school", "gpalscutoff", 
            "gpaXgpalscutoff", "gpaXgpagrcutoff",
            "male", "totcredits_year1",
            "hsgrade_pct", "age_at_entry", "english",
            "loc_campus1", "loc_campus3", "bpl_north_america",
            "weights", "lowHS", "highHS", "gradin6")
data.for.sim <- sample[myvars]

data.for.sim <- na.omit(data.for.sim)


# Subset for high HS
data.for.sim <- data.for.sim[data.for.sim$english==0,]

# Log-likelihood with weights
llnormal <- function(param,y,x,w) {
  cov <- x %*% param[1:ncol(x)]
  sigma <- exp(param[ncol(x)+1])
  -.5* sum( log(2*pi) + log((sigma^2)/w) + w*((y-cov)^2)/(sigma^2))
}

y <- data.for.sim$gradin6
X <- data.for.sim[,c("gpalscutoff", 
                     "gpaXgpalscutoff", "gpaXgpagrcutoff")]
Xmat <- as.matrix(cbind(1,X))
W <- data.for.sim$weights

opt <- optim(par = rep(0.01,5), fn = llnormal, y=y, x=Xmat,
             w=W,
             method = "BFGS", control = list(fnscale = -1),
             hessian = TRUE)
opt$par
stderrors <- diag(sqrt(solve(-opt$hessian)))
stderrors

# Simulate betas
varcv <- -solve(opt$hessian)
set.seed(02138)
library(mvtnorm)
sims <- rmvnorm(1000, mean = opt$par, sigma = varcv)

# Under probation
predCovs.treatment <- c(1,1,-0.01,0)

# NOT Under Probation
predCovs.control <- c(1,0,0,0.01)

# Get expected values
pred.pi1.nonenglish <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.treatment%*%x)
pred.pi0.nonenglish <- apply(sims[1:1000,1:4],1,function(x)
  predCovs.control%*%x)

# Plot expected values
plot(density(pred.pi0.nonenglish), col="blue",
     main = "Nonnative English",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(0.5,.8),
     sub= "") 
lines(density(pred.pi1.nonenglish), lty=2, col = "red")



par( mfrow = c( 3, 2 ) )
plot(density(pred.pi0), col="blue",
     main = "All",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(0.5,.8),
     sub= "") 
lines(density(pred.pi1), lty=2, col = "red")
#plot.new()
plot(density(pred.pi0.low), col="blue",
     main = "HS grades < median",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(0.5,.8),
     sub= "") 
lines(density(pred.pi1.low), lty=2, col = "red")
plot(density(pred.pi0.high), col="blue",
     main = "HS grades > median",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(0.5,.8),
     sub= "") 
lines(density(pred.pi1.high), lty=2, col = "red")
plot(density(pred.pi0.male), col="blue",
     main = "Males",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(0.5,.8),
     sub= "") 
lines(density(pred.pi1.male), lty=2, col = "red")
plot(density(pred.pi0.female), col="blue",
     main = "Females",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(0.5,.8),
     sub= "") 
lines(density(pred.pi1.female), lty=2, col = "red")
plot(density(pred.pi0.english), col="blue",
     main = "Native English",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(0.5,.8),
     sub= "") 
lines(density(pred.pi1.english), lty=2, col = "red")
plot(density(pred.pi0.nonenglish), col="blue",
     main = "Nonnative English",
     xlab = "Proportion graduating within 6 years",
     ylim = c(0,20),
     xlim = c(0.5,.8),
     sub= "") 
lines(density(pred.pi1.nonenglish), lty=2, col = "red")
dev.off()
